Revisions to Tool for creating CodeReview questions

Add syntax highlighting hint. (Without it, Python code was being highlighted as if it were Java.)

Source Link

edit approved Apr 26, 2014 at 22:14

1.7k
13
17

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

command-line handling suggestion

Source Link

edited Feb 8, 2014 at 9:25

200_success

145.7k
22
191
481

I would encourage you to produce more explicit output, particularly with the filenames. If I wanted to reverse the process and scrape the code into files on my machine, using a Python script such as the following…

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

… then I would have to make assumptions about the filename extension.

The invocation method could be improved. Instead of hard-coding a particular directory to look in for the source files, I would suggest…

If files are explicitly passed to the program as command-line arguments, use those files.

If a directory is specified, then use all files contained therein, excluding files with significant non-ASCII content.

If no command-line arguments are used, then operate on the current directory.

It would be nice to be able to say java ReviewPreparer *.java | pbcopy.

I would encourage you to produce more explicit output, particularly with the filenames. If I wanted to reverse the process and scrape the code into files on my machine, using a Python script such as the following…

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

… then I would have to make assumptions about the filename extension.

I would encourage you to produce more explicit output, particularly with the filenames. If I wanted to reverse the process and scrape the code into files on my machine, using a Python script such as the following…

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

… then I would have to make assumptions about the filename extension.

The invocation method could be improved. Instead of hard-coding a particular directory to look in for the source files, I would suggest…

If files are explicitly passed to the program as command-line arguments, use those files.

If a directory is specified, then use all files contained therein, excluding files with significant non-ASCII content.

If no command-line arguments are used, then operate on the current directory.

It would be nice to be able to say java ReviewPreparer *.java | pbcopy.

Source Link

answered Feb 8, 2014 at 9:16

200_success

145.7k
22
191
481

I would encourage you to produce more explicit output, particularly with the filenames. If I wanted to reverse the process and scrape the code into files on my machine, using a Python script such as the following…

import json
from lxml import html
import re
import requests

FILENAME_HINT_XPATH = "../preceding-sibling::p[1]/strong/text()"

def code_for_post(site, post):
    r = requests.get('https://api.stackexchange.com/2.1/posts/{1}?site={0}&filter=withbody'.format(site, post))
    j = json.loads(r.text)
    body = j['items'][0]['body']
    tree = html.fromstring(body)

    code_elements = tree.xpath("//pre/code[%s]" % (FILENAME_HINT_XPATH))
    return dict((c.xpath(FILENAME_HINT_XPATH)[0], c.findtext(".")) for c in code_elements)

def write_files(code):
    extension = '.java'     # <-- Yuck, due to @Simon.
    for filename_hint, content in code.iteritems():
        filename = re.sub(r'[^A-Za-z0-9]', '', filename_hint) + extension
        with open(filename, 'w') as f:
            print >>f, content

write_files(code_for_post('codereview', 41198))

… then I would have to make assumptions about the filename extension.

Stack Exchange Network

Return to Answer