Commit a6191d08 authored by pleasure2cu's avatar pleasure2cu
Browse files

change filter option

parent 6b4e10b0
......@@ -89,7 +89,7 @@ def main():
tfidf_matrix_file_name = 'tfidf_matrix.npz'
if tfidf_matrix_file_name not in os.listdir('./') or csv_file_name not in os.listdir('./'):
paper_ids, titles, abstracts, bodies = get_all_data(filter_non_english=False)
paper_ids, titles, abstracts, bodies = get_all_data(filter_non_english=True)
tfidf_matrix = get_tfidf_matrix([a + " " + b for a, b in zip(abstracts, bodies)])
# save important data for future runs
with open(csv_file_name, 'w', encoding='utf-8') as f:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment