Not only are you reading the whole of each file into memory, but also you laboriously replicate the information in a table called list_of_lines
.
You have a secondary problem: your choices of variable names severely obfuscate what you are doing.
Here is your script rewritten with the readlines() caper removed and with meaningful names:
file_A1_B1 = open("A1_B1_100000.txt", "r")
file_A2_B2 = open("A2_B2_100000.txt", "r")
file_A1_B2 = open("A1_B2_100000.txt", "r")
file_A2_B1 = open("A2_B1_100000.txt", "r")
file_write = open ("average_generations.txt", "w")
mutation_average = open("mutation_average", "w") # not used
files = [file_A2_B2,file_A2_B2,file_A1_B2,file_A2_B1]
for afile in files:
table = []
for aline in afile:
values = aline.split('\t')
values.remove('\n') # why?
table.append(values)
row_count = len(table)
row0length = len(table[0])
print_counter = 4
for column_index in range(row0length):
column_total = 0
for row_index in range(row_count):
number = float(table[row_index][column_index])
column_total = column_total + number
column_average = column_total/row_count
print column_average
if print_counter == 4:
file_write.write(str(column_average)+'\n')
print_counter = 0
print_counter +=1
file_write.write('\n')
It rapidly becomes apparent that (1) you are calculating column averages (2) the obfuscation led some others to think you were calculating row averages.
As you are calculating column averages, no output is required until the end of each file, and the amount of extra memory actually required is proportional to the number of columns.
Here is a revised version of the outer loop code:
for afile in files:
for row_count, aline in enumerate(afile, start=1):
values = aline.split('\t')
values.remove('\n') # why?
fvalues = map(float, values)
if row_count == 1:
row0length = len(fvalues)
column_index_range = range(row0length)
column_totals = fvalues
else:
assert len(fvalues) == row0length
for column_index in column_index_range:
column_totals[column_index] += fvalues[column_index]
print_counter = 4
for column_index in column_index_range:
column_average = column_totals[column_index] / row_count
print column_average
if print_counter == 4:
file_write.write(str(column_average)+'\n')
print_counter = 0
print_counter +=1