apolinario commited on
Commit
e306cd2
·
1 Parent(s): 7615b9a

Attempt to use the API

Browse files
Files changed (1) hide show
  1. ui/src/app/api/hf-jobs/route.ts +33 -69
ui/src/app/api/hf-jobs/route.ts CHANGED
@@ -1033,81 +1033,45 @@ async function checkHFJobStatus(token: string, jobId: string, jobNamespace?: str
1033
  }
1034
 
1035
  async function checkHFJobsCapacity(token: string): Promise<any> {
1036
- return new Promise(async (resolve, reject) => {
1037
- console.log('Checking HF Jobs capacity for namespace: lora-training-frenzi');
1038
-
1039
- // Create a temporary file to store the output
1040
- const tempFile = path.join(tmpdir(), `hf_jobs_ps_${Date.now()}.txt`);
1041
- console.log(`Writing output to temp file: ${tempFile}`);
1042
-
1043
- // Use shell redirection to write to file
1044
- const command = `hf jobs ps --namespace lora-training-frenzi --token "${token}" > "${tempFile}" 2>&1`;
1045
 
1046
- const childProcess = spawn('sh', ['-c', command], {
1047
- env: {
1048
- ...process.env,
1049
- HF_TOKEN: token,
1050
- TERM: 'dumb',
1051
- NO_COLOR: '1',
1052
- }
1053
  });
1054
 
1055
- childProcess.on('close', async (code) => {
1056
- console.log(`hf jobs ps process exited with code: ${code}`);
1057
-
1058
- try {
1059
- // Read the output from the temporary file
1060
- const output = await readFile(tempFile, 'utf-8');
1061
-
1062
- console.log('=== RAW OUTPUT START ===');
1063
- console.log(output);
1064
- console.log('=== RAW OUTPUT END ===');
1065
-
1066
- // Count RUNNING jobs in the output
1067
- // Split by newline and filter out empty lines
1068
- const lines = output.split(/\r?\n/).filter(line => line.trim().length > 0);
1069
- let runningCount = 0;
1070
-
1071
- console.log(`Total non-empty lines in output: ${lines.length}`);
1072
-
1073
- for (let i = 0; i < lines.length; i++) {
1074
- const line = lines[i];
1075
- console.log(`Line ${i}: "${line}"`);
1076
-
1077
- // Check if line contains RUNNING (case-sensitive as shown in your output)
1078
- if (line.includes('RUNNING')) {
1079
- runningCount++;
1080
- console.log(` ✓ Line ${i} contains RUNNING (count: ${runningCount})`);
1081
- }
1082
- }
1083
 
1084
- const atCapacity = runningCount >= 32;
 
1085
 
1086
- console.log(`\n=== FINAL COUNT ===`);
1087
- console.log(`Found ${runningCount} RUNNING jobs. At capacity: ${atCapacity}`);
1088
- console.log(`==================\n`);
 
 
 
 
 
1089
 
1090
- // Clean up temp file
1091
- try {
1092
- await unlink(tempFile);
1093
- } catch (unlinkError) {
1094
- console.warn('Failed to delete temp file:', unlinkError);
1095
- }
1096
 
1097
- resolve({
1098
- runningJobs: runningCount,
1099
- atCapacity,
1100
- capacityLimit: 32,
1101
- });
1102
- } catch (parseError: any) {
1103
- console.error('Failed to read or parse jobs ps output:', parseError);
1104
- reject(new Error('Failed to parse capacity status'));
1105
- }
1106
- });
1107
 
1108
- childProcess.on('error', (err) => {
1109
- console.error('HF Jobs ps process error:', err);
1110
- reject(new Error(`Process error: ${err.message}`));
1111
- });
1112
- });
 
 
 
 
1113
  }
 
1033
  }
1034
 
1035
  async function checkHFJobsCapacity(token: string): Promise<any> {
1036
+ try {
1037
+ console.log('Checking HF Jobs capacity for namespace: lora-training-frenzi via API');
 
 
 
 
 
 
 
1038
 
1039
+ // Use HuggingFace API directly instead of CLI to avoid TTY issues
1040
+ const response = await fetch('https://huggingface.co/api/jobs/lora-training-frenzi', {
1041
+ headers: {
1042
+ 'Authorization': `Bearer ${token}`,
1043
+ },
 
 
1044
  });
1045
 
1046
+ if (!response.ok) {
1047
+ throw new Error(`API request failed: ${response.status} ${response.statusText}`);
1048
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1049
 
1050
+ const jobs = await response.json();
1051
+ console.log(`Fetched ${jobs.length} total jobs from API`);
1052
 
1053
+ // Count jobs with status RUNNING
1054
+ let runningCount = 0;
1055
+ for (const job of jobs) {
1056
+ const status = job.status?.stage || job.status;
1057
+ if (status === 'RUNNING') {
1058
+ runningCount++;
1059
+ }
1060
+ }
1061
 
1062
+ const atCapacity = runningCount >= 32;
 
 
 
 
 
1063
 
1064
+ console.log(`\n=== FINAL COUNT ===`);
1065
+ console.log(`Found ${runningCount} RUNNING jobs. At capacity: ${atCapacity}`);
1066
+ console.log(`==================\n`);
 
 
 
 
 
 
 
1067
 
1068
+ return {
1069
+ runningJobs: runningCount,
1070
+ atCapacity,
1071
+ capacityLimit: 32,
1072
+ };
1073
+ } catch (error: any) {
1074
+ console.error('Failed to check capacity via API:', error);
1075
+ throw new Error(`Failed to check capacity: ${error.message}`);
1076
+ }
1077
  }