remove   ( special character )
like:   , < or &
https://www.web2generators.com/html-based-tools/online-html-entities-encoder-and-decoder
remove   # remove the hexa "A0" with open(cleaned_html, 'w') as cleaned_file: nonBreakSpace = u'\xa0' cleaned_file.write(str(soup).replace(nonBreakSpace, r' ')) cleaned_file.close() | |