var str = "<h1>h1</h1> <br>"
str += "<h2>h2</h2> <br>";
str += "<h3>h3</h3> <br>";
str += "text outside everything <br>";
str += "<h2>(and another element!)</h2> <br>"
str += "<img src='http://example.com/example.png'> <br>";
str += "<a href='http://google.com'>a link!</a> <br>";
str += "<ul><li>item 1</li><li>item 2</li><li>item 3</li></ul> <br>";
str += "<ol><li>item 1</li><li>item 2</li><li>item 3</li></ol> <br>";
str += "<strong>BOLD TEXT</strong> and <i>ITALICISED TEXT</i> <br>";
str += "<blockquote>blockquote</blockquote>";
var doc = new DOMParser().parseFromString(str, 'text/html');
var childnodes = doc.body.childNodes;
var markdown = '';
var conversions = {
br: function(data) {
return '\n\n';
},
h1: function(data) {
return '# '
},
h2: function(data) {
return '## ';
},
h3: function(data) {
return '### ';
},
hr: function(data) {
return '---\n';
},
blockquote: function(data) {
return '> ';
},
img: function(data) {
var imgStr = "";
return imgStr;
},
a: function(data) {
return "[" + data.html + "](" + data.curEl.getAttribute('href') + ")";
},
ul: function(data) {
var lis = childnodes[data.i].childNodes;
var newmd = '';
var lislength = lis.length;
for (var x = 0; x < lis.length;lislength; x++) {
newmd += "- " + lis[x].innerHTML + "\n";
}
return newmd;
},
ol: function(data) {
var lis = childnodes[data.i].childNodes;
var counter = 1;
var newmd = '';
var lislength = lis.length
for (var x = 0; x < lis.length;lislength; x++) {
newmd += counter + ". " + lis[x].innerHTML + "\n";
counter++;
}
return newmd;
},
strong: function(data) {
return "**" + data.html + "**";
},
i: function(data) {
return "*" + data.html + "*";
}
};
function convertToMarkdown(curEl, html, tag, i) {
if (tag == undefined) { //for text nodes
markdown += curEl.textContent;
} else {
tag = tag.toLowerCase();
console.log(tag);
markdown += conversions[tag]({
curEl: curEl,
html: html,
tag: tag,
i: i
}) + (['ul', 'ol', 'i', 'strong', 'a'].indexOf(tag) > -1 ? '' : html);
}
}
var length = childnodes.length;
for (var i = 0; i < childnodes.length; i++) {
var curEl = childnodes[i],
html = childnodes[i].innerHTML,
tag = childnodes[i].tagName;
convertToMarkdown(curEl, html, tag, i);
}
console.log(markdown);
var str = "<h1>h1</h1> <br>"
str += "<h2>h2</h2> <br>";
str += "<h3>h3</h3> <br>";
str += "text outside everything <br>";
str += "<h2>(and another element!)</h2> <br>"
str += "<img src='http://example.com/example.png'> <br>";
str += "<a href='http://google.com'>a link!</a> <br>";
str += "<ul><li>item 1</li><li>item 2</li><li>item 3</li></ul> <br>";
str += "<ol><li>item 1</li><li>item 2</li><li>item 3</li></ol> <br>";
str += "<strong>BOLD TEXT</strong> and <i>ITALICISED TEXT</i> <br>";
str += "<blockquote>blockquote</blockquote>";
var doc = new DOMParser().parseFromString(str, 'text/html');
var childnodes = doc.body.childNodes;
var markdown = '';
var conversions = {
br: function(data) {
return '\n\n';
},
h1: function(data) {
return '# '
},
h2: function(data) {
return '## ';
},
h3: function(data) {
return '### ';
},
hr: function(data) {
return '---\n';
},
blockquote: function(data) {
return '> ';
},
img: function(data) {
var imgStr = "";
return imgStr;
},
a: function(data) {
return "[" + data.html + "](" + data.curEl.getAttribute('href') + ")";
},
ul: function(data) {
var lis = childnodes[data.i].childNodes;
var newmd = '';
for (var x = 0; x < lis.length; x++) {
newmd += "- " + lis[x].innerHTML + "\n";
}
return newmd;
},
ol: function(data) {
var lis = childnodes[data.i].childNodes;
var counter = 1;
var newmd = '';
for (var x = 0; x < lis.length; x++) {
newmd += counter + ". " + lis[x].innerHTML + "\n";
counter++;
}
return newmd;
},
strong: function(data) {
return "**" + data.html + "**";
},
i: function(data) {
return "*" + data.html + "*";
}
};
function convertToMarkdown(curEl, html, tag, i) {
if (tag == undefined) { //for text nodes
markdown += curEl.textContent;
} else {
tag = tag.toLowerCase();
console.log(tag);
markdown += conversions[tag]({
curEl: curEl,
html: html,
tag: tag,
i: i
}) + (['ul', 'ol', 'i', 'strong', 'a'].indexOf(tag) > -1 ? '' : html);
}
}
for (var i = 0; i < childnodes.length; i++) {
var curEl = childnodes[i],
html = childnodes[i].innerHTML,
tag = childnodes[i].tagName;
convertToMarkdown(curEl, html, tag, i);
}
console.log(markdown);
var str = "<h1>h1</h1> <br>"
str += "<h2>h2</h2> <br>";
str += "<h3>h3</h3> <br>";
str += "text outside everything <br>";
str += "<h2>(and another element!)</h2> <br>"
str += "<img src='http://example.com/example.png'> <br>";
str += "<a href='http://google.com'>a link!</a> <br>";
str += "<ul><li>item 1</li><li>item 2</li><li>item 3</li></ul> <br>";
str += "<ol><li>item 1</li><li>item 2</li><li>item 3</li></ol> <br>";
str += "<strong>BOLD TEXT</strong> and <i>ITALICISED TEXT</i> <br>";
str += "<blockquote>blockquote</blockquote>";
var doc = new DOMParser().parseFromString(str, 'text/html');
var childnodes = doc.body.childNodes;
var markdown = '';
var conversions = {
br: function(data) {
return '\n\n';
},
h1: function(data) {
return '# '
},
h2: function(data) {
return '## ';
},
h3: function(data) {
return '### ';
},
hr: function(data) {
return '---\n';
},
blockquote: function(data) {
return '> ';
},
img: function(data) {
var imgStr = "";
return imgStr;
},
a: function(data) {
return "[" + data.html + "](" + data.curEl.getAttribute('href') + ")";
},
ul: function(data) {
var lis = childnodes[data.i].childNodes;
var newmd = '';
var lislength = lis.length;
for (var x = 0; x < lislength; x++) {
newmd += "- " + lis[x].innerHTML + "\n";
}
return newmd;
},
ol: function(data) {
var lis = childnodes[data.i].childNodes;
var counter = 1;
var newmd = '';
var lislength = lis.length
for (var x = 0; x < lislength; x++) {
newmd += counter + ". " + lis[x].innerHTML + "\n";
counter++;
}
return newmd;
},
strong: function(data) {
return "**" + data.html + "**";
},
i: function(data) {
return "*" + data.html + "*";
}
};
function convertToMarkdown(curEl, html, tag, i) {
if (tag == undefined) { //for text nodes
markdown += curEl.textContent;
} else {
tag = tag.toLowerCase();
console.log(tag);
markdown += conversions[tag]({
curEl: curEl,
html: html,
tag: tag,
i: i
}) + (['ul', 'ol', 'i', 'strong', 'a'].indexOf(tag) > -1 ? '' : html);
}
}
var length = childnodes.length;
for (var i = 0; i < length; i++) {
var curEl = childnodes[i],
html = childnodes[i].innerHTML,
tag = childnodes[i].tagName;
convertToMarkdown(curEl, html, tag, i);
}
console.log(markdown);
HTML to Markdown converter
I've made a simple HTML→Markdown converter in Javascript and am looking for any feedback. For now, I've basically used Stack Exchange's /editing-help as a guide as to what to convert, but I might look at CommonMark's spec later on.
It uses DOMParser() and then goes through the child nodes to convert things.
My test HTML string right now is:
<h1>h1</h1>
<br>
<h2>h2</h2>
<br>
<h3>h3</h3>
<br>text outside everything
<br>
<h2>(and another element!)</h2>
<br>
<img src='http://example.com/example.png'>
<br><a href='http://google.com'>a link!</a>
<br>
<ul>
<li>item 1</li>
<li>item 2</li>
<li>item 3</li>
</ul>
<br>
<ol>
<li>item 1</li>
<li>item 2</li>
<li>item 3</li>
</ol>
<br><strong>BOLD TEXT</strong> and <i>ITALICISED TEXT</i>
<br>
<blockquote>blockquote</blockquote>
<br>
and that conversion 'works':
# h1
## h2
### h3
text outside everything
## (and another element!)

[a link!](http://google.com)
- item 1
- item 2
- item 3
1. item 1
2. item 2
3. item 3
**BOLD TEXT** and *ITALICISED TEXT*
> blockquote
Code
var str = "<h1>h1</h1> <br>"
str += "<h2>h2</h2> <br>";
str += "<h3>h3</h3> <br>";
str += "text outside everything <br>";
str += "<h2>(and another element!)</h2> <br>"
str += "<img src='http://example.com/example.png'> <br>";
str += "<a href='http://google.com'>a link!</a> <br>";
str += "<ul><li>item 1</li><li>item 2</li><li>item 3</li></ul> <br>";
str += "<ol><li>item 1</li><li>item 2</li><li>item 3</li></ol> <br>";
str += "<strong>BOLD TEXT</strong> and <i>ITALICISED TEXT</i> <br>";
str += "<blockquote>blockquote</blockquote>";
var doc = new DOMParser().parseFromString(str, 'text/html');
var childnodes = doc.body.childNodes;
var markdown = '';
var conversions = {
br: function(data) {
return '\n\n';
},
h1: function(data) {
return '# '
},
h2: function(data) {
return '## ';
},
h3: function(data) {
return '### ';
},
hr: function(data) {
return '---\n';
},
blockquote: function(data) {
return '> ';
},
img: function(data) {
var imgStr = "";
return imgStr;
},
a: function(data) {
return "[" + data.html + "](" + data.curEl.getAttribute('href') + ")";
},
ul: function(data) {
var lis = childnodes[data.i].childNodes;
var newmd = '';
for (var x = 0; x < lis.length; x++) {
newmd += "- " + lis[x].innerHTML + "\n";
}
return newmd;
},
ol: function(data) {
var lis = childnodes[data.i].childNodes;
var counter = 1;
var newmd = '';
for (var x = 0; x < lis.length; x++) {
newmd += counter + ". " + lis[x].innerHTML + "\n";
counter++;
}
return newmd;
},
strong: function(data) {
return "**" + data.html + "**";
},
i: function(data) {
return "*" + data.html + "*";
}
};
function convertToMarkdown(curEl, html, tag, i) {
if (tag == undefined) { //for text nodes
markdown += curEl.textContent;
} else {
tag = tag.toLowerCase();
console.log(tag);
markdown += conversions[tag]({
curEl: curEl,
html: html,
tag: tag,
i: i
}) + (['ul', 'ol', 'i', 'strong', 'a'].indexOf(tag) > -1 ? '' : html);
}
}
for (var i = 0; i < childnodes.length; i++) {
var curEl = childnodes[i],
html = childnodes[i].innerHTML,
tag = childnodes[i].tagName;
convertToMarkdown(curEl, html, tag, i);
}
console.log(markdown);
(you can check the output yourself in the console)
Main Questions:
- Is my code readable? How can I make it more so?
- Is there a cleaner way to do any part of this?
default