To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

aggr.py 910 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import logging
import csv
import pandas as pd
from .utils import logging as logutils

DEFAULT_CLUSTERS_SUMMARY_CSV_FILE="clusters.csv"
DEFAULT_CX_COURSE_STUDENTS_CSV_FILE="cx_students.csv"

def main(
    clusters_summary_csv_file=DEFAULT_CLUSTERS_SUMMARY_CSV_FILE,
    cx_course_students_csv_file=DEFAULT_CX_COURSE_STUDENTS_CSV_FILE):

  clusters_csv = pd.read_csv(clusters_summary_csv_file)
  students_csv = pd.read_csv(cx_course_students_csv_file)

  clusters = clusters_csv.groupby("cluster_id")
  for name, cluster in clusters:
    ids = pd.concat([cluster["id1"], cluster["id2"]], ignore_index=False)
    # print(cluster[["id1", "id2"]].unique())
    print(ids.unique())

  # for wtf in clusters[["id1", "id2"]]:
  #   print(wtf)

  # for cluster in clusters.groups:
  #   print(cluster)

  # for cluster in clusters.groupby("cluster_id"):
  #   print(cluster["id1"])

if __name__ == "__main__":
  main()