Convert HTML to text

I forgot from where I copied this script:

#!/bin/bash
# Usage: convert-html-to-md […]
# Convert the specified HTML files into Markdown text-format equivalents
# in the current working directory. The file extension will be .md.txt.
# Requires the html2text.py Python script by Aaron Swartz to convert
# from HTML to Markdown text [www.aaronsw.com/2002/html2text/].
# html2text=”${1}”shift

[while [ -n “${1}” ] ; do
# Use the contents of the title element for the filename. In case
# the title element spans multiple lines, the entire file is first
# converted to a single line before the sed pattern is applied. Any
# “unsafe” characters are then replaced with hyphens to produce a
# valid filename.
title=$(cat “${1}” | \
tr -d ‘\n\r’ | \
sed -nre ‘s/^.*(.*?)<\/title>.*$/\1\n/ip’ | \<br /> tr “\`~\!@#$%^&*()+={}|[]\\:;\”\’<>?,/ \t” ‘[-*]’)</p> <p> # If there’s no title, then just use the original filename.<br /> if [ -z “${title}” ] ; then<br /> title=$(basename “${1}” .html)<br /> fi</p> <p> # Convert the HTML to Markdown.<br /> cat “${1}” | python “${html2text}” > “${title}.md.txt”<br /> shift<br /> done]</p> <div class="sharedaddy sd-sharing-enabled"><div class="robots-nocontent sd-block sd-social sd-social-icon sd-sharing"><h3 class="sd-title">Share this:</h3><div class="sd-content"><ul><li class="share-facebook"><a rel="nofollow" data-shared="sharing-facebook-1997" class="share-facebook sd-button share-icon no-text" href="http://johnlaudun.org/20080508-convert-html-to-text/?share=facebook" target="_blank" title="Click to share on Facebook"><span></span><span class="sharing-screen-reader-text">Click to share on Facebook (Opens in new window)</span></a></li><li class="share-twitter"><a rel="nofollow" data-shared="sharing-twitter-1997" class="share-twitter sd-button share-icon no-text" href="http://johnlaudun.org/20080508-convert-html-to-text/?share=twitter" target="_blank" title="Click to share on Twitter"><span></span><span class="sharing-screen-reader-text">Click to share on Twitter (Opens in new window)</span></a></li><li class="share-linkedin"><a rel="nofollow" data-shared="sharing-linkedin-1997" class="share-linkedin sd-button share-icon no-text" href="http://johnlaudun.org/20080508-convert-html-to-text/?share=linkedin" target="_blank" title="Click to share on LinkedIn"><span></span><span class="sharing-screen-reader-text">Click to share on LinkedIn (Opens in new window)</span></a></li><li class="share-reddit"><a rel="nofollow" data-shared="" class="share-reddit sd-button share-icon no-text" href="http://johnlaudun.org/20080508-convert-html-to-text/?share=reddit" target="_blank" title="Click to share on Reddit"><span></span><span class="sharing-screen-reader-text">Click to share on Reddit (Opens in new window)</span></a></li><li class="share-google-plus-1"><a rel="nofollow" data-shared="sharing-google-1997" class="share-google-plus-1 sd-button share-icon no-text" href="http://johnlaudun.org/20080508-convert-html-to-text/?share=google-plus-1" target="_blank" title="Click to share on Google+"><span></span><span class="sharing-screen-reader-text">Click to share on Google+ (Opens in new window)</span></a></li><li class="share-email"><a rel="nofollow" data-shared="" class="share-email sd-button share-icon no-text" href="http://johnlaudun.org/20080508-convert-html-to-text/?share=email" target="_blank" title="Click to email this to a friend"><span></span><span class="sharing-screen-reader-text">Click to email this to a friend (Opens in new window)</span></a></li><li class="share-print"><a rel="nofollow" data-shared="" class="share-print sd-button share-icon no-text" href="http://johnlaudun.org/20080508-convert-html-to-text/#print" target="_blank" title="Click to print"><span></span><span class="sharing-screen-reader-text">Click to print (Opens in new window)</span></a></li><li class="share-end"></li></ul></div></div></div> <div id='jp-relatedposts' class='jp-relatedposts' > <h3 class="jp-relatedposts-headline"><em>Related</em></h3> </div> </div><!-- .entry-content --> <footer class="entry-meta"> Posted on <a href="http://johnlaudun.org/20080508-convert-html-to-text/" title="12:16" rel="bookmark"><time class="entry-date" datetime="2008-05-08T12:16:18+00:00" pubdate>2008 May 8</time></a><span class="byline"> by <span class="author vcard"><a class="url fn n" href="http://johnlaudun.org/author/johnlaudun/" title="View all posts by johnlaudun" rel="author">johnlaudun</a></span></span>. This entry was posted in <a href="http://johnlaudun.org/category/work/" rel="category tag">work</a> and tagged <a href="http://johnlaudun.org/tag/code/" rel="tag">code</a>, <a href="http://johnlaudun.org/tag/html/" rel="tag">html</a>, <a href="http://johnlaudun.org/tag/python/" rel="tag">python</a>. Bookmark the <a href="http://johnlaudun.org/20080508-convert-html-to-text/" title="Permalink to Convert HTML to text" rel="bookmark">permalink</a>. </footer><!-- .entry-meta --> </article><!-- #post-1997 --> <nav role="navigation" id="nav-below" class="site-navigation post-navigation"> <h1 class="assistive-text">Post navigation</h1> <div class="nav-previous"><a href="http://johnlaudun.org/20080506-of-heraclitus-and-wet-pants/" rel="prev"><span class="meta-nav">←</span> Of Heraclitus and Wet Pants</a></div> <div class="nav-next"><a href="http://johnlaudun.org/20080509-audiobooks-in-itunes/" rel="next">Audiobooks in iTunes <span class="meta-nav">→</span></a></div> </nav><!-- #nav-below --> </div><!-- #content .site-content --> </div><!-- #primary .content-area --> <div id="secondary" class="widget-area" role="complementary"> <aside id="search-5" class="widget widget_search"> <form method="get" id="searchform" action="http://johnlaudun.org/" role="search"> <label for="s" class="assistive-text">Search</label> <input type="text" class="field" name="s" value="" id="s" placeholder="Search …" /> <input type="submit" class="submit" name="submit" id="searchsubmit" value="Search" /> </form> </aside><aside id="text-3" class="widget widget_text"> <div class="textwidget"><a href="http://johnlaudun.org/boat/" rel="attachment wp-att-7877"><img src="https://i0.wp.com/media.johnlaudun.org.s3.amazonaws.com/wordpress/media/2016/01/ACB-cover-small-103x150.jpeg?resize=103%2C150" alt="The Amazing Crawfish Boat" data-recalc-dims="1" /></a> <p style="line-height:1.1 "><small><em>The Amazing Crawfish Boat</em> is available at your favorite bookseller (both <a href="http://amzn.to/1rf9wAT">Amazon</a> and <a href="http://www.barnesandnoble.com/w/the-amazing-crawfish-boat-john-laudun/1121843205?ean=9781496804204">B&N</a>). I have also released some additional <em>free</em> materials: audio versions of some of the chapters and photos — all available for download. Details are available on the <a href="http://johnlaudun.org/boat/">book’s page</a>.</small></p></div> </aside><aside id="top-posts-2" class="widget widget_top-posts"><h1 class="widget-title">Top Posts</h1><ul> <li> <a href="http://johnlaudun.org/20131228-ipython-notebook-keyboard-shortcuts/" class="bump-view" data-bump-view="tp"> iPython Notebook Keyboard Shortcuts </a> </li> <li> <a href="http://johnlaudun.org/20150512-installing-and-setting-pip-with-macports/" class="bump-view" data-bump-view="tp"> Installing, and Setting, PIP with MacPorts </a> </li> <li> <a href="http://johnlaudun.org/20170228-open-source-tools-for-nlp/" class="bump-view" data-bump-view="tp"> Open Source Tools for NLP </a> </li> <li> <a href="http://johnlaudun.org/20130126-nltk-stopwords/" class="bump-view" data-bump-view="tp"> NLTK and Stopwords </a> </li> <li> <a href="http://johnlaudun.org/20121207-streaming-audio-to-an-onkyo-receiver/" class="bump-view" data-bump-view="tp"> Streaming Audio to an Onkyo Receiver </a> </li> </ul></aside> </div><!-- #secondary .widget-area --> </div><!-- #main .site-main --> <footer id="colophon" class="site-footer" role="contentinfo"> <div class="site-info"> <a href="http://wordpress.org/" rel="generator">Proudly powered by WordPress</a> Theme: Publish by <a href="http://kovshenin.com/" rel="designer">Konstantin Kovshenin</a>. </div><!-- .site-info --> </footer><!-- #colophon .site-footer --> </div><!-- #page .hfeed .site --> <div style="display:none"> </div> <script> jQuery(document).ready(function () { jQuery.post('http://johnlaudun.org?ga_action=googleanalytics_get_script', {action: 'googleanalytics_get_script'}, function(response) { var F = new Function ( response ); return( F() ); }); }); </script> <script type="text/javascript"> window.WPCOM_sharing_counts = {"http:\/\/johnlaudun.org\/20080508-convert-html-to-text\/":1997}; </script> <div id="sharing_email" style="display: none;"> <form action="/20080508-convert-html-to-text/" method="post"> <label for="target_email">Send to Email Address</label> <input type="email" name="target_email" id="target_email" value="" /> <label for="source_name">Your Name</label> <input type="text" name="source_name" id="source_name" value="" /> <label for="source_email">Your Email Address</label> <input type="email" name="source_email" id="source_email" value="" /> <input type="text" id="jetpack-source_f_name" name="source_f_name" class="input" value="" size="25" autocomplete="off" title="This field is for validation and should not be changed" /> <script>jQuery( document ).ready( function(){ document.getElementById('jetpack-source_f_name').value = '' });</script> <img style="float: right; display: none" class="loading" src="http://johnlaudun.org/wordpress/wp-content/plugins/jetpack/modules/sharedaddy/images/loading.gif" alt="loading" width="16" height="16" /> <input type="submit" value="Send Email" class="sharing_send" /> <a rel="nofollow" href="#cancel" class="sharing_cancel">Cancel</a> <div class="errors errors-1" style="display: none;"> Post was not sent - check your email addresses! </div> <div class="errors errors-2" style="display: none;"> Email check failed, please try again </div> <div class="errors errors-3" style="display: none;"> Sorry, your blog cannot share posts by email. </div> </form> </div> <script type='text/javascript' src='http://johnlaudun.org/wordpress/wp-content/plugins/jetpack/modules/photon/photon.js?ver=20130122'></script> <script type='text/javascript' src='https://s0.wp.com/wp-content/js/devicepx-jetpack.js?ver=201742'></script> <script type='text/javascript' src='http://s.gravatar.com/js/gprofiles.js?ver=2017Octaa'></script> <script type='text/javascript'> /* <![CDATA[ */ var WPGroHo = {"my_hash":""}; /* ]]> */ </script> <script type='text/javascript' src='http://johnlaudun.org/wordpress/wp-content/plugins/jetpack/modules/wpgroho.js?ver=4.8.2'></script> <script type='text/javascript' src='http://johnlaudun.org/wordpress/wp-content/themes/publish/js/small-menu.js?ver=20120206'></script> <script type='text/javascript' src='http://johnlaudun.org/wordpress/wp-includes/js/wp-embed.min.js?ver=4.8.2'></script> <script type='text/javascript'> /* <![CDATA[ */ var sharing_js_options = {"lang":"en","counts":"1"}; /* ]]> */ </script> <script type='text/javascript' src='http://johnlaudun.org/wordpress/wp-content/plugins/jetpack/modules/sharedaddy/sharing.js?ver=5.3'></script> <script type='text/javascript'> var windowOpen; jQuery( document.body ).on( 'click', 'a.share-facebook', function() { // If there's another sharing window open, close it. if ( 'undefined' !== typeof windowOpen ) { windowOpen.close(); } windowOpen = window.open( jQuery( this ).attr( 'href' ), 'wpcomfacebook', 'menubar=1,resizable=1,width=600,height=400' ); return false; }); var windowOpen; jQuery( document.body ).on( 'click', 'a.share-twitter', function() { // If there's another sharing window open, close it. if ( 'undefined' !== typeof windowOpen ) { windowOpen.close(); } windowOpen = window.open( jQuery( this ).attr( 'href' ), 'wpcomtwitter', 'menubar=1,resizable=1,width=600,height=350' ); return false; }); var windowOpen; jQuery( document.body ).on( 'click', 'a.share-linkedin', function() { // If there's another sharing window open, close it. if ( 'undefined' !== typeof windowOpen ) { windowOpen.close(); } windowOpen = window.open( jQuery( this ).attr( 'href' ), 'wpcomlinkedin', 'menubar=1,resizable=1,width=580,height=450' ); return false; }); var windowOpen; jQuery( document.body ).on( 'click', 'a.share-google-plus-1', function() { // If there's another sharing window open, close it. if ( 'undefined' !== typeof windowOpen ) { windowOpen.close(); } windowOpen = window.open( jQuery( this ).attr( 'href' ), 'wpcomgoogle-plus-1', 'menubar=1,resizable=1,width=480,height=550' ); return false; }); </script> <script type='text/javascript' src='https://stats.wp.com/e-201742.js' async defer></script> <script type='text/javascript'> _stq = window._stq || []; _stq.push([ 'view', {v:'ext',j:'1:5.3',blog:'33779968',post:'1997',tz:'-6',srv:'johnlaudun.org'} ]); _stq.push([ 'clickTrackerInit', '33779968', '1997' ]); </script> </body> </html>