python batch proccessing Batch processing large text files for sentiment analysis

So I have written this script (pastebin . com/NJgLW8xb) in Python 2.7 and I was wondering where I could improve it or efficient. This is my first script so idkI don't now much. The script takes a text file from a folder removes blank spaces with list comprehension and then breaks it into smaller 20 elements and saves it each 20 elements as a definition into a dictionary with iterative numbered keys. Each key is then used to fetch the definition from the dictionary and sends each through the apiAPI one by one saving the output into another dictionary. The result dictionary is then converted to text and saved into a text file.

code:

edited body; edited tags

Source Link

edited Aug 9, 2018 at 16:58

Ziyad

13
3

so iSo I have written this script (pastebin . com/NJgLW8xb) in pythonPython 2.7 and iI was wondering where iI could improve it or efficient. thisThis is my first script so idk much. theThe script takes a text file from a folder removes blank spaces with list comprehension and then breaks it into smaller 20 elements and saves it each 20 elements as a definition into a dictionary with iterative numbered keys. eachEach key is then used to fetch the definition from the dictionary and sends each through the api one by one saving the output into another dictionary. theThe result dictionary is then converted to text and saved into a text file.

added 161 characters in body

Source Link

edited Aug 9, 2018 at 16:54

Ziyad

13
3

import glob
import os
import json
import paralleldots

api_key = "iyuDsKbgZSsCxVj6Moe37MzdqCUuvkpf33t6qS3X3cH"
paralleldots.set_api_key(api_key)

output = input(r"C:\Users\User\Desktop\trial")
txt_files = os.path.join("D:\\english\\stou\\check", '*.txt')

dic = {}                                #dictionary for storing list elements
res = {}                                #results dictionary for api output

def for_high (list_e):                  #defining a function for taking list elements and putting them into dic
    no_keys = list_e / 20
    dic = {
        1: l[1:21]
    }
    i = 1
    start_key_value = 21
    while i <= no_keys:
        i += 1
        end_key_value = start_key_value + 20
        dic.update({i: ''.join(l[start_key_value:end_key_value])})
        start_key_value += 20
    for x in dic:                       #creating a for loop for getting and saving the output and creating a new file
        output = paralleldots.emotion(dic.get(x))
        res.update({x: output})
        with open(os.path.join("C:\\Users\\User\\Desktop\\trial", filename), 'w') as text_file:
            text_file.write(json.dumps(res))

for txt_file in glob.glob(txt_files): # for loop for going through all the text files in the input directory with open(txt_file, "r") as input_file: filename = os.path.splitext(os.path.basename(txt_file))[0] + '.txt' l = [l for l in input_file.readlines() if l.strip()] list_e = int(len(l)) #no. of list elements variable


for txt_file in glob.glob(txt_files):     # for loop for going through all the text files in the input directory
    with open(txt_file, "r") as input_file:
        filename = os.path.splitext(os.path.basename(txt_file))[0] + '.txt'
        l = [l for l in input_file.readlines() if l.strip()]
        list_e = int(len(l))            #no. of list elements variable

    if list_e > 20:                     #checking if list has more than 20 elements
        if list_e % 2 != 0:             #checking if list has an odd no. of elements
            list_e += 1
            for_high(list_e)

        else:
            for_high(list_e)
    else:
        in_txt = paralleldots.emotion(l)
        filename = os.path.splitext(os.path.basename(txt_file))[0] + '.txt'
        with open(os.path.join("C:\\Users\\User\\Desktop\\trial", filename), 'w') as text_file:
            text_file.write(str(in_txt))

import glob
import os
import json
import paralleldots

api_key = "iyuDsKbgZSsCxVj6Moe37MzdqCUuvkpf33t6qS3X3cH"
paralleldots.set_api_key(api_key)

output = input(r"C:\Users\User\Desktop\trial")
txt_files = os.path.join("D:\\english\\stou\\check", '*.txt')

dic = {}                                #dictionary for storing list elements
res = {}                                #results dictionary for api output

def for_high (list_e):                  #defining a function for taking list elements and putting them into dic
 no_keys = list_e / 20
 dic = {
    1: l[1:21]
}
i = 1
start_key_value = 21
while i <= no_keys:
    i += 1
    end_key_value = start_key_value + 20
    dic.update({i: ''.join(l[start_key_value:end_key_value])})
    start_key_value += 20
for x in dic:                       #creating a for loop for getting and saving the output and creating a new file
    output = paralleldots.emotion(dic.get(x))
    res.update({x: output})
    with open(os.path.join("C:\\Users\\User\\Desktop\\trial", filename), 'w') as text_file:
        text_file.write(json.dumps(res))

for txt_file in glob.glob(txt_files): # for loop for going through all the text files in the input directory with open(txt_file, "r") as input_file: filename = os.path.splitext(os.path.basename(txt_file))[0] + '.txt' l = [l for l in input_file.readlines() if l.strip()] list_e = int(len(l)) #no. of list elements variable

if list_e > 20:                     #checking if list has more than 20 elements
    if list_e % 2 != 0:             #checking if list has an odd no. of elements
        list_e += 1
        for_high(list_e)

    else:
        for_high(list_e)
else:
    in_txt = paralleldots.emotion(l)
    filename = os.path.splitext(os.path.basename(txt_file))[0] + '.txt'
    with open(os.path.join("C:\\Users\\User\\Desktop\\trial", filename), 'w') as text_file:
        text_file.write(str(in_txt))

import glob
import os
import json
import paralleldots

api_key = "iyuDsKbgZSsCxVj6Moe37MzdqCUuvkpf33t6qS3X3cH"
paralleldots.set_api_key(api_key)

output = input(r"C:\Users\User\Desktop\trial")
txt_files = os.path.join("D:\\english\\stou\\check", '*.txt')

dic = {}                                #dictionary for storing list elements
res = {}                                #results dictionary for api output

def for_high (list_e):                  #defining a function for taking list elements and putting them into dic
    no_keys = list_e / 20
    dic = {
        1: l[1:21]
    }
    i = 1
    start_key_value = 21
    while i <= no_keys:
        i += 1
        end_key_value = start_key_value + 20
        dic.update({i: ''.join(l[start_key_value:end_key_value])})
        start_key_value += 20
    for x in dic:                       #creating a for loop for getting and saving the output and creating a new file
        output = paralleldots.emotion(dic.get(x))
        res.update({x: output})
        with open(os.path.join("C:\\Users\\User\\Desktop\\trial", filename), 'w') as text_file:
            text_file.write(json.dumps(res))

for txt_file in glob.glob(txt_files):     # for loop for going through all the text files in the input directory
    with open(txt_file, "r") as input_file:
        filename = os.path.splitext(os.path.basename(txt_file))[0] + '.txt'
        l = [l for l in input_file.readlines() if l.strip()]
        list_e = int(len(l))            #no. of list elements variable

    if list_e > 20:                     #checking if list has more than 20 elements
        if list_e % 2 != 0:             #checking if list has an odd no. of elements
            list_e += 1
            for_high(list_e)

        else:
            for_high(list_e)
    else:
        in_txt = paralleldots.emotion(l)
        filename = os.path.splitext(os.path.basename(txt_file))[0] + '.txt'
        with open(os.path.join("C:\\Users\\User\\Desktop\\trial", filename), 'w') as text_file:
            text_file.write(str(in_txt))

Source Link

asked Aug 9, 2018 at 16:48

Ziyad

13
3

Loading

Stack Exchange Network

Return to Question

python batch proccessing Batch processing large text files for sentiment analysis

python batch proccessing large text files for sentiment analysis

Batch processing large text files for sentiment analysis