Spaces:
Sleeping
Sleeping
| # Read the file | |
| with open('Dataset/twitter/gnn_pipeline.py', 'r') as f: | |
| content = f.read() | |
| # The method to insert (after _get_cluster_idx) | |
| method_code = ''' | |
| def _parse_hashtags(self, hlist): | |
| """ | |
| Safely parse hashtag list from various formats. | |
| Returns list of hashtags or empty list if parsing fails. | |
| Handles: | |
| - None values | |
| - NaN/float NaN | |
| - Strings (splits by whitespace) | |
| - Lists/tuples/sets | |
| - NumPy arrays | |
| """ | |
| # Handle None | |
| if hlist is None: | |
| return [] | |
| # Handle NaN/float NaN (scalar check) | |
| if isinstance(hlist, float): | |
| if pd.isna(hlist): | |
| return [] | |
| return [] | |
| # Handle string - split by whitespace | |
| if isinstance(hlist, str): | |
| if len(hlist.strip()) == 0: | |
| return [] | |
| return hlist.split() | |
| # Handle list/tuple/set | |
| if isinstance(hlist, (list, tuple, set)): | |
| return list(hlist) | |
| # Handle numpy arrays | |
| if isinstance(hlist, np.ndarray): | |
| return hlist.tolist() | |
| # Fallback: return empty | |
| return [] | |
| ''' | |
| # Find where to insert it - after _get_cluster_idx method | |
| insert_marker = " return self.cluster_id_map[c]" | |
| if insert_marker in content: | |
| # Find the position | |
| pos = content.find(insert_marker) | |
| if pos != -1: | |
| # Find the end of that line | |
| end_pos = content.find('\n', pos) | |
| # Insert the new method after this line | |
| new_content = content[:end_pos+1] + method_code + content[end_pos+1:] | |
| # Write back | |
| with open('Dataset/twitter/gnn_pipeline.py', 'w') as f: | |
| f.write(new_content) | |
| print("β Successfully added _parse_hashtags method!") | |
| else: | |
| print("β Could not find insertion point") | |
| exit(1) | |
| else: | |
| print("β Could not find _get_cluster_idx method") | |
| exit(1) | |