Convert HTML to text

I forgot from where I copied this script:

#!/bin/bash
# Usage: convert-html-to-md […]
# Convert the specified HTML files into Markdown text-format equivalents
# in the current working directory. The file extension will be .md.txt.
# Requires the html2text.py Python script by Aaron Swartz to convert
# from HTML to Markdown text [www.aaronsw.com/2002/html2text/].
# html2text=”${1}”shift

[while [ -n “${1}” ] ; do
# Use the contents of the title element for the filename. In case
# the title element spans multiple lines, the entire file is first
# converted to a single line before the sed pattern is applied. Any
# “unsafe” characters are then replaced with hyphens to produce a
# valid filename.
title=$(cat “${1}” | \
tr -d ‘\n\r’ | \
sed -nre ‘s/^.*(.*?)<\/title>.*$/\1\n/ip’ | \<br /> tr “\`~\!@#$%^&*()+={}|[]\\:;\”\’<>?,/ \t” ‘[-*]’)</p> <p> # If there’s no title, then just use the original filename.<br /> if [ -z “${title}” ] ; then<br /> title=$(basename “${1}” .html)<br /> fi</p> <p> # Convert the HTML to Markdown.<br /> cat “${1}” | python “${html2text}” > “${title}.md.txt”<br /> shift<br /> done]</p> <div class="sharedaddy sd-sharing-enabled"><div class="robots-nocontent sd-block sd-social sd-social-icon sd-sharing"><h3 class="sd-title">Share this:</h3><div class="sd-content"><ul><li class="share-facebook"><a rel="nofollow noopener noreferrer" data-shared="sharing-facebook-1997" class="share-facebook sd-button share-icon no-text" href="https://johnlaudun.org/20080508-convert-html-to-text/?share=facebook" target="_blank" title="Click to share on Facebook"><span></span><span class="sharing-screen-reader-text">Click to share on Facebook (Opens in new window)</span></a></li><li class="share-twitter"><a rel="nofollow noopener noreferrer" data-shared="sharing-twitter-1997" class="share-twitter sd-button share-icon no-text" href="https://johnlaudun.org/20080508-convert-html-to-text/?share=twitter" target="_blank" title="Click to share on Twitter"><span></span><span class="sharing-screen-reader-text">Click to share on Twitter (Opens in new window)</span></a></li><li class="share-linkedin"><a rel="nofollow noopener noreferrer" data-shared="sharing-linkedin-1997" class="share-linkedin sd-button share-icon no-text" href="https://johnlaudun.org/20080508-convert-html-to-text/?share=linkedin" target="_blank" title="Click to share on LinkedIn"><span></span><span class="sharing-screen-reader-text">Click to share on LinkedIn (Opens in new window)</span></a></li><li class="share-reddit"><a rel="nofollow noopener noreferrer" data-shared="" class="share-reddit sd-button share-icon no-text" href="https://johnlaudun.org/20080508-convert-html-to-text/?share=reddit" target="_blank" title="Click to share on Reddit"><span></span><span class="sharing-screen-reader-text">Click to share on Reddit (Opens in new window)</span></a></li><li class="share-print"><a rel="nofollow noopener noreferrer" data-shared="" class="share-print sd-button share-icon no-text" href="https://johnlaudun.org/20080508-convert-html-to-text/#print" target="_blank" title="Click to print"><span></span><span class="sharing-screen-reader-text">Click to print (Opens in new window)</span></a></li><li class="share-end"></li></ul></div></div></div><div class='sharedaddy sd-block sd-like jetpack-likes-widget-wrapper jetpack-likes-widget-unloaded' id='like-post-wrapper-33779968-1997-5f6de3afa849b' data-src='https://widgets.wp.com/likes/#blog_id=33779968&post_id=1997&origin=johnlaudun.org&obj_id=33779968-1997-5f6de3afa849b' data-name='like-post-frame-33779968-1997-5f6de3afa849b'><h3 class="sd-title">Like this:</h3><div class='likes-widget-placeholder post-likes-widget-placeholder' style='height: 55px;'><span class='button'><span>Like</span></span> <span class="loading">Loading...</span></div><span class='sd-text-color'></span><a class='sd-link-color'></a></div> <div id='jp-relatedposts' class='jp-relatedposts' > <h3 class="jp-relatedposts-headline"><em>Related</em></h3> </div> </div><!-- .entry-content --> <footer class="entry-meta"> Posted on <a href="https://johnlaudun.org/20080508-convert-html-to-text/" title="12:16" rel="bookmark"><time class="entry-date" datetime="2008-05-08T12:16:18-06:00" pubdate>2008 May 8</time></a><span class="byline"> by <span class="author vcard"><a class="url fn n" href="https://johnlaudun.org/author/johnlaudun/" title="View all posts by johnlaudun" rel="author">johnlaudun</a></span></span>. This entry was posted in <a href="https://johnlaudun.org/category/work/" rel="category tag">work</a> and tagged <a href="https://johnlaudun.org/tag/code/" rel="tag">code</a>, <a href="https://johnlaudun.org/tag/html/" rel="tag">html</a>, <a href="https://johnlaudun.org/tag/python/" rel="tag">python</a>. Bookmark the <a href="https://johnlaudun.org/20080508-convert-html-to-text/" title="Permalink to Convert HTML to text" rel="bookmark">permalink</a>. </footer><!-- .entry-meta --> </article><!-- #post-1997 --> <nav role="navigation" id="nav-below" class="site-navigation post-navigation"> <h1 class="assistive-text">Post navigation</h1> <div class="nav-previous"><a href="https://johnlaudun.org/20080506-of-heraclitus-and-wet-pants/" rel="prev"><span class="meta-nav">←</span> Of Heraclitus and Wet Pants</a></div> <div class="nav-next"><a href="https://johnlaudun.org/20080509-audiobooks-in-itunes/" rel="next">Audiobooks in iTunes <span class="meta-nav">→</span></a></div> </nav><!-- #nav-below --> </div><!-- #content .site-content --> </div><!-- #primary .content-area --> <div id="secondary" class="widget-area" role="complementary"> <aside id="text-3" class="widget widget_text"> <div class="textwidget"><h2>Recent Publications</h2> <p><a href="http://johnlaudun.org/docs/publications/Laudun-2019.pdf">Folklore as a Networked Economy: How a Recently-Invented-but-Traditional Artifact Reveals the Way Folkloric Production Has Always Worked.</a></p> <p><a href="https://culturalanalytics.org/article/11044-ted-talks-as-data">TED Talks as Data</a></p> <p><a href="http://johnlaudun.org/docs/publications/Laudun-2018-Proof.pdf">Trucks under Water: A Legend from the 2016 Flood</a></p> <p>And, of course <em><a href="http://amzn.to/1rf9wAT">the book.</a></em></p> </div> </aside><aside id="search-5" class="widget widget_search"> <form method="get" id="searchform" action="https://johnlaudun.org/" role="search"> <label for="s" class="assistive-text">Search</label> <input type="text" class="field" name="s" value="" id="s" placeholder="Search …" /> <input type="submit" class="submit" name="submit" id="searchsubmit" value="Search" /> </form> </aside><aside id="top-posts-2" class="widget widget_top-posts"><h1 class="widget-title">Top Posts & Pages</h1><ul> <li> <a href="https://johnlaudun.org/20170928-append-python-list-using-list-comprehension/" class="bump-view" data-bump-view="tp">Append a Python List Using a List Comprehension</a> </li> <li> <a href="https://johnlaudun.org/20180603-understanding-how-beautiful-soup-works/" class="bump-view" data-bump-view="tp">Understanding How Beautiful Soup Works</a> </li> <li> <a href="https://johnlaudun.org/20131228-ipython-notebook-keyboard-shortcuts/" class="bump-view" data-bump-view="tp">iPython Notebook Keyboard Shortcuts</a> </li> <li> <a href="https://johnlaudun.org/20181111-24-liter-daypack-comparison/" class="bump-view" data-bump-view="tp">24-liter Daypack Comparison</a> </li> <li> <a href="https://johnlaudun.org/20150512-installing-and-setting-pip-with-macports/" class="bump-view" data-bump-view="tp">Installing, and Setting, PIP with MacPorts</a> </li> <li> <a href="https://johnlaudun.org/20080321-word-wrap-filling-in-emacs/" class="bump-view" data-bump-view="tp">Word-wrap (filling) in Emacs</a> </li> <li> <a href="https://johnlaudun.org/20121207-streaming-audio-to-an-onkyo-receiver/" class="bump-view" data-bump-view="tp">Streaming Audio to an Onkyo Receiver</a> </li> <li> <a href="https://johnlaudun.org/20130221-text-analytics-101/" class="bump-view" data-bump-view="tp">Text Analytics 101</a> </li> <li> <a href="https://johnlaudun.org/20200702-quick-labels-with-pythons-f-string/" class="bump-view" data-bump-view="tp">Quick Labels with Python's f-string</a> </li> <li> <a href="https://johnlaudun.org/20150724-the-orbits-of-venus-and-earth/" class="bump-view" data-bump-view="tp">The Orbits of Venus and Earth</a> </li> </ul></aside> </div><!-- #secondary .widget-area --> </div><!-- #main .site-main --> <footer id="colophon" class="site-footer" role="contentinfo"> <div class="site-info"> <a href="http://wordpress.org/" rel="generator">Proudly powered by WordPress</a> Theme: Publish by <a href="http://kovshenin.com/" rel="designer">Konstantin Kovshenin</a>. </div><!-- .site-info --> </footer><!-- #colophon .site-footer --> </div><!-- #page .hfeed .site --> <script type="text/javascript"> window.WPCOM_sharing_counts = {"https:\/\/johnlaudun.org\/20080508-convert-html-to-text\/":1997}; </script> <script type='text/javascript' src='https://johnlaudun.org/wordpress/wp-content/themes/publish/js/small-menu.js?ver=20120206' id='small-menu-js'></script> <script type='text/javascript' src='https://johnlaudun.org/wordpress/wp-content/plugins/jetpack/vendor/automattic/jetpack-lazy-images/src/js/lazy-images.min.js?ver=1.0.0' id='jetpack-lazy-images-js'></script> <script type='text/javascript' src='https://johnlaudun.org/wordpress/wp-includes/js/wp-embed.min.js?ver=5.5.1' id='wp-embed-js'></script> <script type='text/javascript' src='https://johnlaudun.org/wordpress/wp-content/plugins/jetpack/_inc/build/postmessage.min.js?ver=8.9' id='postmessage-js'></script> <script type='text/javascript' src='https://johnlaudun.org/wordpress/wp-content/plugins/jetpack/_inc/build/jquery.jetpack-resize.min.js?ver=8.9' id='jetpack_resize-js'></script> <script type='text/javascript' src='https://johnlaudun.org/wordpress/wp-content/plugins/jetpack/_inc/build/likes/queuehandler.min.js?ver=8.9' id='jetpack_likes_queuehandler-js'></script> <script type='text/javascript' id='sharing-js-js-extra'> /* <![CDATA[ */ var sharing_js_options = {"lang":"en","counts":"1","is_stats_active":"1"}; /* ]]> */ </script> <script type='text/javascript' src='https://johnlaudun.org/wordpress/wp-content/plugins/jetpack/_inc/build/sharedaddy/sharing.min.js?ver=8.9' id='sharing-js-js'></script> <script type='text/javascript' id='sharing-js-js-after'> var windowOpen; jQuery( document.body ).on( 'click', 'a.share-facebook', function() { // If there's another sharing window open, close it. if ( 'undefined' !== typeof windowOpen ) { windowOpen.close(); } windowOpen = window.open( jQuery( this ).attr( 'href' ), 'wpcomfacebook', 'menubar=1,resizable=1,width=600,height=400' ); return false; }); var windowOpen; jQuery( document.body ).on( 'click', 'a.share-twitter', function() { // If there's another sharing window open, close it. if ( 'undefined' !== typeof windowOpen ) { windowOpen.close(); } windowOpen = window.open( jQuery( this ).attr( 'href' ), 'wpcomtwitter', 'menubar=1,resizable=1,width=600,height=350' ); return false; }); var windowOpen; jQuery( document.body ).on( 'click', 'a.share-linkedin', function() { // If there's another sharing window open, close it. if ( 'undefined' !== typeof windowOpen ) { windowOpen.close(); } windowOpen = window.open( jQuery( this ).attr( 'href' ), 'wpcomlinkedin', 'menubar=1,resizable=1,width=580,height=450' ); return false; }); </script> <iframe src='https://widgets.wp.com/likes/master.html?ver=202039#ver=202039' scrolling='no' id='likes-master' name='likes-master' style='display:none;'></iframe> <div id='likes-other-gravatars'><div class="likes-text"><span>%d</span> bloggers like this:</div><ul class="wpl-avatars sd-like-gravatars"></ul></div> <script type='text/javascript' src='https://stats.wp.com/e-202039.js' async='async' defer='defer'></script> <script type='text/javascript'> _stq = window._stq || []; _stq.push([ 'view', {v:'ext',j:'1:8.9',blog:'33779968',post:'1997',tz:'-6',srv:'johnlaudun.org'} ]); _stq.push([ 'clickTrackerInit', '33779968', '1997' ]); </script> </body> </html>